#############################################################
#################      Non-Parametric RDD        #############
###############      October 10, 2018     ##################
########  Rerun and fixed some issues: Dec 16, 2022


#####This code consists of the codes from several sources.
#####    1. The bootstrap functions are taken from the following source:
####      http://www.stat.wisc.edu/~larget/stat302/chap3.pdf
####     2. The script for the non-parametric point estimation is taken from the 
####      Keele and Titiunik (2015) replication materials.


rm(list=ls())
library(plyr)
library(readstata13)
library(multiwayvcov)
library(sandwich)
library(lmtest)
library(stargazer)
library(ggplot2)
source("~/Dropbox/Personal Research 2017/replications/distance-functions.R")  #load distance function from another file
options(width=300)
library(foreign)
library(rdrobust)
library(fields)

###################################################
####### Define Bootstrap Function #################
#####Functon is taken from the website:
####http://www.stat.wisc.edu/~larget/stat302/chap3.pdf

boot.mean = function(x,B,binwidth=NULL) {
  n = length(x)
  boot.samples = matrix( sample(x,size=n*B,replace=TRUE), B, n)
  boot.statistics = apply(boot.samples,1,mean)
  se = sd(boot.statistics)
  require(ggplot2)
  if ( is.null(binwidth) )
    binwidth = diff(range(boot.statistics))/30
  p = ggplot(data.frame(x=boot.statistics),aes(x=x)) +
    geom_histogram(aes(y=..density..),binwidth=binwidth) + geom_density(color="red")
  #plot(p)
  interval = mean(x) + c(-1,1)*2*se
  #print( interval )
  return( list(boot.mean=mean(x), boot.statistics = boot.statistics, interval=interval, se=se) )
}


######################################

# setwd("~/Dropbox/Personal Research 2017")  #set wd
dat=read.csv("~/Dropbox/Personal Research 2017/replications/karn_nov16.csv") #load the file

set.seed(113) #for the randomization - set the seed

summary(dat$Latitude)
summary(dat$Longitude)
summary(dat$border1)     #mysore-bombay
summary(dat$border2)     #hyd-bombay
summary(dat$pucca_binary)
band=20


pointsALL<- read.dbf("~/Dropbox/Personal Research 2017/replications/mysborderpts.dbf") #load the points on the border
dim(pointsALL)
names(pointsALL)
pointsALL$latitude= pointsALL$Latitude
pointsALL$longitude= -pointsALL$Longitude



## Look at latitude and longitude ( for this town lat: 43, long:87)
range(pointsALL$latitude)
range(pointsALL$longitude)

data=dat[which(dat$Latitude!="NA"|dat$Longitude!="NA"),]

data$longitude=-data$Longitude
data$latitude=data$Latitude
data$treat=data$border1
summary(data$treat)

range(data$latitude)
range(data$longitude)



#health 
##########################################
#
# Define outcomes of analysis
#
#########################################
outcomes <- c("health_binary") #substitute the necessary outcome


##########################################
#
# Use same fix bandwidth for every point
#
#########################################
# define points
z = c(1:nrow(pointsALL))
#z = c(20, 75, 150, 225)
cat("We only include points below \n")
print(z)
points = pointsALL[z,]


# use fixed bandwidth for every point and outcome
hfix = band
hlist =  vector("list", length(outcomes))
for(j in 1:length(outcomes)) hlist[[j]] = rep(hfix, length(z))

##########################################
#
# Estimate yhat for treatments and controls separately, using fixed bandwidth, for every outcome
#
######################################### 
## Set up parameters for estimation
dislimit10per = 1.00
dislimitmin   = 0.50 
minobs = 10
np = nrow(points)



colnm2 = c("Point", "Estimate", "Conv-CIl",  "Conv-CIu", "hfix", "Ntr-hfix", "Nco-hfix", "Rob-CIl",  "Rob-CIu")
finaltab = as.data.frame(matrix(data=NA, ncol = length(colnm2), nrow = np * length(outcomes), dimnames = list(NULL, colnm2)))



for(j in 1:length(outcomes)) {
  t0 = proc.time()[3]
  cat("/******************************************************************************\n")
  cat("Starting estimation of variable", outcomes[j], "\n")
  cat("/******************************************************************************\n")    
  ## Set up data
  datause = data[complete.cases(data$latitude, data$longitude, data$treat, data[,c(outcomes[j])]),]
  
  ## Get covariates: latitude and longitude
  x1 <- datause$latitude
  x2 <- datause$longitude
  # Get outcome
  y <- datause[,c(outcomes[j])]
  treat <- datause$treat
  #table(datause$countyname, datause$treat)
  
  ## Set up treated data
  indx = (treat == 1)
  sum(indx)
  x1.Tr <- x1[indx]
  x2.Tr <- x2[indx]
  y.Tr  <- y[indx]
  rm(indx)
  gc()
  
  ## Set up control data
  indx = (treat == 0)
  sum(indx)
  x1.Co <- x1[indx]
  x2.Co <- x2[indx]
  y.Co  <- y[indx]
  rm(indx)
  gc()
  
  
  for(i in 1:np) {
    cat("Point", i, "of", np, "\n")
    b1 = points$latitude[i]
    b2 = points$longitude[i]
    h = hlist[[j]][i]
    
    
    ####################################
    #### Calculate score in treated area
    ####################################
    
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Tr, lon2 = x2.Tr)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disTr = dis
    cat("Minimum treated distance is ",min(dis), "\n")
    
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))     
    wTr=w
    npTr = sum(w>0)
    cat("There are", npTr, "Tr observations with positive weights out of", length(y.Tr), "Tr obs \n")  
    
    ####################################
    #### Calculate score in control area
    ####################################
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Co, lon2 = x2.Co)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disCo = dis
    cat("Minimum control distance is ",min(dis), "\n")   
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))    
    wCo=w
    npCo = sum(w>0)
    cat("There are", npCo, "Co observations with positive weights out of", length(y.Co), "Co obs \n")  
    
    
    score = c(disTr, -disCo)
    y = c(y.Tr, y.Co)
    
    if(npTr >= minobs & npCo >= minobs) {
      #rdout = rdrobust(y = y , x = score , c = 0 , h = h)
      #print(rdout)
      #rdout.Coef = summary(rdout)
      
      rdoutCCT = rdrobust(y = y ,x = score , c = 0 , bwselect = "mserd")
      print(rdoutCCT)
      hCCT = rdoutCCT$bws[1]          
      rdoutCCT.Coef = summary(rdoutCCT)
      rdoutCCT$sum
      
      #rdoutIK  = rdrobust(y = y ,x = score , c = 0 , bwselect = "cerrd")
      #print(rdoutIK)
      #hIK = rdoutIK$bws[1]
      #rdoutIK.Coef = summary(rdoutIK)
    }
    
    ################################
    # Store results
    #################################
    finaltab[i,"Point"]         = i 
    finaltab[i, "Estimate"]     = round(rdoutCCT$coef[1], 2)
    finaltab[i, "Conv-CIl"]     = round(rdoutCCT$ci[1,1], 2)
    finaltab[i,  "Conv-CIu"]    = round(rdoutCCT$ci[1,2], 2)
    finaltab[i, "hfix"]         = hfix
    finaltab[i, "Ntr-hfix"]     = npTr
    finaltab[i, "Nco-hfix"]     = npCo
    finaltab[i, "Rob-CIl"]      = round(rdoutCCT$ci[3,1], 2)
    finaltab[i, "Rob-CIu"]      = round(rdoutCCT$ci[3,2], 2)
    #finaltab[i,"hCCT"]          = round(hCCT, 2)
    #finaltab[i, "Ntr-hCCT"]     = rdoutCCT$sum[1,2]
    #finaltab[i,"Nco-hCCT"]      = rdoutCCT$sum[1,1]
  }   
  
  cat("\n\n\n")    
  cat("-------------------------------------------------------\n")
  cat("Final results -- Outcome", outcomes[j], "\n")
  print(finaltab)
  cat("-------------------------------------------------------\n")    
}# end iterating over outcomes

library(xtable)
xtable(finaltab)
finaltab[2]
apply(finaltab[2], 2, mean)

n=nrow(pointsALL)
B=1000
x=as.matrix(finaltab[2])

boot.mean = function(x,B,binwidth=NULL) {
  n = length(x)
  boot.samples = matrix( sample(x,size=n*B,replace=TRUE), B, n)
  boot.statistics = apply(boot.samples,1,mean)
  se = sd(boot.statistics)
  require(ggplot2)
  if ( is.null(binwidth) )
    binwidth = diff(range(boot.statistics))/30
  p = ggplot(data.frame(x=boot.statistics),aes(x=x)) +
    geom_histogram(aes(y=..density..),binwidth=binwidth) + geom_density(color="red")
  plot(p)
  interval = mean(x) + c(-1,1)*2*se
  #print( interval )
  return( list(boot.mean=mean(x), boot.statistics = boot.statistics, interval=interval, se=se, plot=p) )
}

mean2= boot.mean(x, B)$boot.mean
ci2=boot.mean(x, B)$interval



#pucca 
##########################################
#
# Define outcomes of analysis
#
#########################################
outcomes <- c("pucca_binary") #substitute the necessary outcome


##########################################
#
# Use same fix bandwidth for every point
#
#########################################
# define points
z = c(1:nrow(pointsALL))
#z = c(20, 75, 150, 225)
cat("We only include points below \n")
print(z)
points = pointsALL[z,]


# use fixed bandwidth for every point and outcome
hfix = band
hlist =  vector("list", length(outcomes))
for(j in 1:length(outcomes)) hlist[[j]] = rep(hfix, length(z))

##########################################
#
# Estimate yhat for treatments and controls separately, using fixed bandwidth, for every outcome
#
######################################### 
## Set up parameters for estimation
dislimit10per = 1.00
dislimitmin   = 0.50 
minobs = 10
np = nrow(points)





colnm2 = c("Point", "Estimate", "Conv-CIl",  "Conv-CIu", "hfix", "Ntr-hfix", "Nco-hfix", "Rob-CIl",  "Rob-CIu")
finaltab = as.data.frame(matrix(data=NA, ncol = length(colnm2), nrow = np * length(outcomes), dimnames = list(NULL, colnm2)))



for(j in 1:length(outcomes)) {
  t0 = proc.time()[3]
  cat("/******************************************************************************\n")
  cat("Starting estimation of variable", outcomes[j], "\n")
  cat("/******************************************************************************\n")    
  ## Set up data
  datause = data[complete.cases(data$latitude, data$longitude, data$treat, data[,c(outcomes[j])]),]
  
  ## Get covariates: latitude and longitude
  x1 <- datause$latitude
  x2 <- datause$longitude
  # Get outcome
  y <- datause[,c(outcomes[j])]
  treat <- datause$treat
  #table(datause$countyname, datause$treat)
  
  ## Set up treated data
  indx = (treat == 1)
  sum(indx)
  x1.Tr <- x1[indx]
  x2.Tr <- x2[indx]
  y.Tr  <- y[indx]
  rm(indx)
  gc()
  
  ## Set up control data
  indx = (treat == 0)
  sum(indx)
  x1.Co <- x1[indx]
  x2.Co <- x2[indx]
  y.Co  <- y[indx]
  rm(indx)
  gc()
  
  
  for(i in 1:np) {
    cat("Point", i, "of", np, "\n")
    b1 = points$latitude[i]
    b2 = points$longitude[i]
    h = hlist[[j]][i]
    
    
    ####################################
    #### Calculate score in treated area
    ####################################
    
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Tr, lon2 = x2.Tr)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disTr = dis
    cat("Minimum treated distance is ",min(dis), "\n")
    
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))     
    wTr=w
    npTr = sum(w>0)
    cat("There are", npTr, "Tr observations with positive weights out of", length(y.Tr), "Tr obs \n")  
    
    ####################################
    #### Calculate score in control area
    ####################################
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Co, lon2 = x2.Co)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disCo = dis
    cat("Minimum control distance is ",min(dis), "\n")   
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))    
    wCo=w
    npCo = sum(w>0)
    cat("There are", npCo, "Co observations with positive weights out of", length(y.Co), "Co obs \n")  
    
    
    score = c(disTr, -disCo)
    y = c(y.Tr, y.Co)
    
    if(npTr >= minobs & npCo >= minobs) {
      #rdout = rdrobust(y = y , x = score , c = 0 , h = h)
      #print(rdout)
      #rdout.Coef = summary(rdout)
      
      rdoutCCT = rdrobust(y = y ,x = score , c = 0 , bwselect = "mserd")
      print(rdoutCCT)
      hCCT = rdoutCCT$bws[1]          
      rdoutCCT.Coef = summary(rdoutCCT)
      rdoutCCT$sum
      
      #rdoutIK  = rdrobust(y = y ,x = score , c = 0 , bwselect = "cerrd")
      #print(rdoutIK)
      #hIK = rdoutIK$bws[1]
      #rdoutIK.Coef = summary(rdoutIK)
    }
    
    ################################
    # Store results
    #################################
    finaltab[i,"Point"]         = i 
    finaltab[i, "Estimate"]     = round(rdoutCCT$coef[1], 2)
    finaltab[i, "Conv-CIl"]     = round(rdoutCCT$ci[1,1], 2)
    finaltab[i,  "Conv-CIu"]    = round(rdoutCCT$ci[1,2], 2)
    finaltab[i, "hfix"]         = hfix
    finaltab[i, "Ntr-hfix"]     = npTr
    finaltab[i, "Nco-hfix"]     = npCo
    finaltab[i, "Rob-CIl"]      = round(rdoutCCT$ci[3,1], 2)
    finaltab[i, "Rob-CIu"]      = round(rdoutCCT$ci[3,2], 2)
    #finaltab[i,"hCCT"]          = round(hCCT, 2)
    #finaltab[i, "Ntr-hCCT"]     = rdoutCCT$sum[1,2]
    #finaltab[i,"Nco-hCCT"]      = rdoutCCT$sum[1,1]
  }   
  
  cat("\n\n\n")    
  cat("-------------------------------------------------------\n")
  cat("Final results -- Outcome", outcomes[j], "\n")
  print(finaltab)
  cat("-------------------------------------------------------\n")    
}# end iterating over outcomes

library(xtable)
xtable(finaltab)
finaltab[2]
apply(finaltab[2], 2, mean)

n=nrow(pointsALL)
B=1000
x=as.matrix(finaltab[2])

boot.mean = function(x,B,binwidth=NULL) {
  n = length(x)
  boot.samples = matrix( sample(x,size=n*B,replace=TRUE), B, n)
  boot.statistics = apply(boot.samples,1,mean)
  se = sd(boot.statistics)
  require(ggplot2)
  if ( is.null(binwidth) )
    binwidth = diff(range(boot.statistics))/30
  p = ggplot(data.frame(x=boot.statistics),aes(x=x)) +
    geom_histogram(aes(y=..density..),binwidth=binwidth) + geom_density(color="red")
  plot(p)
  interval = mean(x) + c(-1,1)*2*se
  #print( interval )
  return( list(boot.mean=mean(x), boot.statistics = boot.statistics, interval=interval, se=se, plot=p) )
}

mean3= boot.mean(x, B)$boot.mean
ci3=boot.mean(x, B)$interval


#mys 
res2=c(mean2, ci2)
res3=c(mean3, ci3)

mysres=rbind(res2, res3)
mysres #results of the coefficients with CI for Mysore




###################################################
#hyderabad #
######################################

summary(dat$Latitude)
summary(dat$Longitude)
summary(dat$border1)     #mysore-bombay
summary(dat$border2)     #hyd-bombay
summary(dat$pucca_binary)


pointsALL<- read.dbf("~/Dropbox/Personal Research 2017/replications/hyd_borderpts.dbf") #load the points of the border
dim(pointsALL)
names(pointsALL)
pointsALL$latitude= pointsALL$Latitude
pointsALL$longitude= -pointsALL$Longitude


## Look at latitude and longitude ( for this town lat: 43, long:87)
range(pointsALL$latitude)
range(pointsALL$longitude)

data=dat[which(dat$Latitude!="NA"|dat$Longitude!="NA"),]

data$longitude=-data$Longitude
data$latitude=data$Latitude
data$treat=data$border2
summary(data$treat)

range(data$latitude)
range(data$longitude)



#health 
##########################################
#
# Define outcomes of analysis
#
#########################################
outcomes <- c("health_binary") #substitute the necessary outcome


##########################################
#
# Use same fix bandwidth for every point
#
#########################################
# define points
z = c(1:nrow(pointsALL))
#z = c(20, 75, 150, 225)
cat("We only include points below \n")
print(z)
points = pointsALL[z,]


# use fixed bandwidth for every point and outcome
hfix = band
hlist =  vector("list", length(outcomes))
for(j in 1:length(outcomes)) hlist[[j]] = rep(hfix, length(z))

##########################################
#
# Estimate yhat for treatments and controls separately, using fixed bandwidth, for every outcome
#
######################################### 
## Set up parameters for estimation
dislimit10per = 1.00
dislimitmin   = 0.50 
minobs = 10
np = nrow(points)





colnm2 = c("Point", "Estimate", "Conv-CIl",  "Conv-CIu", "hfix", "Ntr-hfix", "Nco-hfix", "Rob-CIl",  "Rob-CIu")
finaltab = as.data.frame(matrix(data=NA, ncol = length(colnm2), nrow = np * length(outcomes), dimnames = list(NULL, colnm2)))



for(j in 1:length(outcomes)) {
  t0 = proc.time()[3]
  cat("/******************************************************************************\n")
  cat("Starting estimation of variable", outcomes[j], "\n")
  cat("/******************************************************************************\n")    
  ## Set up data
  datause = data[complete.cases(data$latitude, data$longitude, data$treat, data[,c(outcomes[j])]),]
  
  ## Get covariates: latitude and longitude
  x1 <- datause$latitude
  x2 <- datause$longitude
  # Get outcome
  y <- datause[,c(outcomes[j])]
  treat <- datause$treat
  #table(datause$countyname, datause$treat)
  
  ## Set up treated data
  indx = (treat == 1)
  sum(indx)
  x1.Tr <- x1[indx]
  x2.Tr <- x2[indx]
  y.Tr  <- y[indx]
  rm(indx)
  gc()
  
  ## Set up control data
  indx = (treat == 0)
  sum(indx)
  x1.Co <- x1[indx]
  x2.Co <- x2[indx]
  y.Co  <- y[indx]
  rm(indx)
  gc()
  
  
  for(i in 1:np) {
    cat("Point", i, "of", np, "\n")
    b1 = points$latitude[i]
    b2 = points$longitude[i]
    h = hlist[[j]][i]
    
    
    ####################################
    #### Calculate score in treated area
    ####################################
    
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Tr, lon2 = x2.Tr)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disTr = dis
    cat("Minimum treated distance is ",min(dis), "\n")
    
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))     
    wTr=w
    npTr = sum(w>0)
    cat("There are", npTr, "Tr observations with positive weights out of", length(y.Tr), "Tr obs \n")  
    
    ####################################
    #### Calculate score in control area
    ####################################
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Co, lon2 = x2.Co)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disCo = dis
    cat("Minimum control distance is ",min(dis), "\n")   
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))    
    wCo=w
    npCo = sum(w>0)
    cat("There are", npCo, "Co observations with positive weights out of", length(y.Co), "Co obs \n")  
    
    
    score = c(disTr, -disCo)
    y = c(y.Tr, y.Co)
    
    if(npTr >= minobs & npCo >= minobs) {
      #rdout = rdrobust(y = y , x = score , c = 0 , h = h)
      #print(rdout)
      #rdout.Coef = summary(rdout)
      
      rdoutCCT = rdrobust(y = y ,x = score , c = 0 , bwselect = "mserd")
      print(rdoutCCT)
      hCCT = rdoutCCT$bws[1]          
      rdoutCCT.Coef = summary(rdoutCCT)
      rdoutCCT$sum
      
      #rdoutIK  = rdrobust(y = y ,x = score , c = 0 , bwselect = "cerrd")
      #print(rdoutIK)
      #hIK = rdoutIK$bws[1]
      #rdoutIK.Coef = summary(rdoutIK)
    }
    
    ################################
    # Store results
    #################################
    finaltab[i,"Point"]         = i 
    finaltab[i, "Estimate"]     = round(rdoutCCT$coef[1], 2)
    finaltab[i, "Conv-CIl"]     = round(rdoutCCT$ci[1,1], 2)
    finaltab[i,  "Conv-CIu"]    = round(rdoutCCT$ci[1,2], 2)
    finaltab[i, "hfix"]         = hfix
    finaltab[i, "Ntr-hfix"]     = npTr
    finaltab[i, "Nco-hfix"]     = npCo
    finaltab[i, "Rob-CIl"]      = round(rdoutCCT$ci[3,1], 2)
    finaltab[i, "Rob-CIu"]      = round(rdoutCCT$ci[3,2], 2)
    #finaltab[i,"hCCT"]          = round(hCCT, 2)
    #finaltab[i, "Ntr-hCCT"]     = rdoutCCT$sum[1,2]
    #finaltab[i,"Nco-hCCT"]      = rdoutCCT$sum[1,1]
  }   
  
  cat("\n\n\n")    
  cat("-------------------------------------------------------\n")
  cat("Final results -- Outcome", outcomes[j], "\n")
  print(finaltab)
  cat("-------------------------------------------------------\n")    
}# end iterating over outcomes

library(xtable)
xtable(finaltab)
finaltab[2]
apply(finaltab[2], 2, mean)

n=nrow(pointsALL)
B=1000
x=as.matrix(finaltab[2])

boot.mean = function(x,B,binwidth=NULL) {
  n = length(x)
  boot.samples = matrix( sample(x,size=n*B,replace=TRUE), B, n)
  boot.statistics = apply(boot.samples,1,mean)
  se = sd(boot.statistics)
  require(ggplot2)
  if ( is.null(binwidth) )
    binwidth = diff(range(boot.statistics))/30
  p = ggplot(data.frame(x=boot.statistics),aes(x=x)) +
    geom_histogram(aes(y=..density..),binwidth=binwidth) + geom_density(color="red")
  plot(p)
  interval = mean(x) + c(-1,1)*2*se
  #print( interval )
  return( list(boot.mean=mean(x), boot.statistics = boot.statistics, interval=interval, se=se, plot=p) )
}

mean6= boot.mean(x, B)$boot.mean
ci6=boot.mean(x, B)$interval



#pucca 
##########################################
#
# Define outcomes of analysis
#
#########################################
outcomes <- c("pucca_binary") #substitute the necessary outcome


##########################################
#
# Use same fix bandwidth for every point
#
#########################################
# define points
z = c(1:nrow(pointsALL))
#z = c(20, 75, 150, 225)
cat("We only include points below \n")
print(z)
points = pointsALL[z,]


# use fixed bandwidth for every point and outcome
hfix = band
hlist =  vector("list", length(outcomes))
for(j in 1:length(outcomes)) hlist[[j]] = rep(hfix, length(z))

##########################################
#
# Estimate yhat for treatments and controls separately, using fixed bandwidth, for every outcome
#
######################################### 
## Set up parameters for estimation
dislimit10per = 1.00
dislimitmin   = 0.50 
minobs = 10
np = nrow(points)


colnm2 = c("Point", "Estimate", "Conv-CIl",  "Conv-CIu", "hfix", "Ntr-hfix", "Nco-hfix", "Rob-CIl",  "Rob-CIu")
finaltab = as.data.frame(matrix(data=NA, ncol = length(colnm2), nrow = np * length(outcomes), dimnames = list(NULL, colnm2)))



for(j in 1:length(outcomes)) {
  t0 = proc.time()[3]
  cat("/******************************************************************************\n")
  cat("Starting estimation of variable", outcomes[j], "\n")
  cat("/******************************************************************************\n")    
  ## Set up data
  datause = data[complete.cases(data$latitude, data$longitude, data$treat, data[,c(outcomes[j])]),]
  
  ## Get covariates: latitude and longitude
  x1 <- datause$latitude
  x2 <- datause$longitude
  # Get outcome
  y <- datause[,c(outcomes[j])]
  treat <- datause$treat
  #table(datause$countyname, datause$treat)
  
  ## Set up treated data
  indx = (treat == 1)
  sum(indx)
  x1.Tr <- x1[indx]
  x2.Tr <- x2[indx]
  y.Tr  <- y[indx]
  rm(indx)
  gc()
  
  ## Set up control data
  indx = (treat == 0)
  sum(indx)
  x1.Co <- x1[indx]
  x2.Co <- x2[indx]
  y.Co  <- y[indx]
  rm(indx)
  gc()
  
  
  for(i in 1:np) {
    cat("Point", i, "of", np, "\n")
    b1 = points$latitude[i]
    b2 = points$longitude[i]
    h = hlist[[j]][i]
    
    
    ####################################
    #### Calculate score in treated area
    ####################################
    
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Tr, lon2 = x2.Tr)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disTr = dis
    cat("Minimum treated distance is ",min(dis), "\n")
    
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))     
    wTr=w
    npTr = sum(w>0)
    cat("There are", npTr, "Tr observations with positive weights out of", length(y.Tr), "Tr obs \n")  
    
    ####################################
    #### Calculate score in control area
    ####################################
    ## Calculate chordal distance between each (x1,x2) latitude-longitude pair in the data and the point of the boundary (b1,b2) where we are evaluating
    outdis = disvec(lat1=b1, lon1=b2, lat2 = x1.Co, lon2 = x2.Co)
    dis= as.vector(outdis$chord)
    summary(dis)  
    disCo = dis
    cat("Minimum control distance is ",min(dis), "\n")   
    ## Create weights using triangular kernel and fixed bandwidth 
    w = as.vector((1/h) * Kt((dis-0)/h))    
    wCo=w
    npCo = sum(w>0)
    cat("There are", npCo, "Co observations with positive weights out of", length(y.Co), "Co obs \n")  
    
    
    score = c(disTr, -disCo)
    y = c(y.Tr, y.Co)
    
    if(npTr >= minobs & npCo >= minobs) {
      #rdout = rdrobust(y = y , x = score , c = 0 , h = h)
      #print(rdout)
      #rdout.Coef = summary(rdout)
      
      rdoutCCT = rdrobust(y = y ,x = score , c = 0 , bwselect = "mserd")
      print(rdoutCCT)
      hCCT = rdoutCCT$bws[1]          
      rdoutCCT.Coef = summary(rdoutCCT)
      rdoutCCT$sum
      
      #rdoutIK  = rdrobust(y = y ,x = score , c = 0 , bwselect = "cerrd")
      #print(rdoutIK)
      #hIK = rdoutIK$bws[1]
      #rdoutIK.Coef = summary(rdoutIK)
    }
    
    ################################
    # Store results
    #################################
    finaltab[i,"Point"]         = i 
    finaltab[i, "Estimate"]     = round(rdoutCCT$coef[1], 2)
    finaltab[i, "Conv-CIl"]     = round(rdoutCCT$ci[1,1], 2)
    finaltab[i,  "Conv-CIu"]    = round(rdoutCCT$ci[1,2], 2)
    finaltab[i, "hfix"]         = hfix
    finaltab[i, "Ntr-hfix"]     = npTr
    finaltab[i, "Nco-hfix"]     = npCo
    finaltab[i, "Rob-CIl"]      = round(rdoutCCT$ci[3,1], 2)
    finaltab[i, "Rob-CIu"]      = round(rdoutCCT$ci[3,2], 2)
    #finaltab[i,"hCCT"]          = round(hCCT, 2)
    #finaltab[i, "Ntr-hCCT"]     = rdoutCCT$sum[1,2]
    #finaltab[i,"Nco-hCCT"]      = rdoutCCT$sum[1,1]
  }   
  
  cat("\n\n\n")    
  cat("-------------------------------------------------------\n")
  cat("Final results -- Outcome", outcomes[j], "\n")
  print(finaltab)
  cat("-------------------------------------------------------\n")    
}# end iterating over outcomes

library(xtable)
xtable(finaltab)
finaltab[2]
apply(finaltab[2], 2, mean)

n=nrow(pointsALL)
B=1000
x=as.matrix(finaltab[2])

boot.mean = function(x,B,binwidth=NULL) {
  n = length(x)
  boot.samples = matrix( sample(x,size=n*B,replace=TRUE), B, n)
  boot.statistics = apply(boot.samples,1,mean)
  se = sd(boot.statistics)
  require(ggplot2)
  if ( is.null(binwidth) )
    binwidth = diff(range(boot.statistics))/30
  p = ggplot(data.frame(x=boot.statistics),aes(x=x)) +
    geom_histogram(aes(y=..density..),binwidth=binwidth) + geom_density(color="red")
  plot(p)
  interval = mean(x) + c(-1,1)*2*se
  #print( interval )
  return( list(boot.mean=mean(x), boot.statistics = boot.statistics, interval=interval, se=se, plot=p) )
}

mean7= boot.mean(x, B)$boot.mean
ci7=boot.mean(x, B)$interval



#mys 
res2=c(mean2, ci2)
res3=c(mean3, ci3)


#hyd 
res6=c(mean6, ci6)
res7=c(mean7, ci7)


mysres=rbind(res2, res3) #results for mysore 
hydres=rbind(res6, res7) #results for hyderabad

results=cbind(mysres, hydres)
rownames(mysres)=c("Health Centers", "Paved Roads")
colnames(mysres)=c("Coefficient", "Lower CI", "Upper CI")
rownames(hydres)=c("Health Centers", "Paved Roads")
colnames(hydres)=c("Coefficient", "Lower CI", "Upper CI")
stargazer(round(mysres, 4)) #creates the non-parametric table for Mysore-Bombay border
stargazer(round(hydres, 4)) #creates the non-parametric table for Hyderabad-Bombay border







